import os
from BasicLibraries import *
import get_data as gd
import regression as rg
from sample_description import get_summary_statistics_after_matching, get_sample_contribution,get_policy_variables
import reg_0 as reg0
import reg_1A_BRD as reg1A
import reg_2_time as reg2
import reg_5_EAR as reg5
os.environ["PATH"] += os.pathsep+'/Library/TeX/texbin'

'''
This is the script to reproduce the results of the main text and the SI
It requires access to COMPUSTAT, Refinitiv and TruCost
The data are then merged in the "get_data.py" script which should be readapted to 
read directly the response diversity data from "rd_102024.csv"
'''

YFE = True

#%% Merge all datasets
make_data =  True
if make_data:
    sdgs = [6,7,9,11,12,13,14,15]
    sdgs_options_name =  'Environmental SDGs'
    diversification_type = 'entropy'
    #=== Accounting 
    comp, db, IMR, esg, tc,  EBITDA_AR, EBIT_AR = gd.get_control_factors()
    #=== Merge with behaviour
    initiatives, missing_sdgs, initiatives_sdgs, initiatives_to_remove = gd.get_initiatives(sdgs)
    dt, dummies = gd.merge_with_behavioural_data(initiatives, initiatives_sdgs, missing_sdgs, sdgs,   \
                                                 initiatives_to_remove, comp, db, IMR, esg, tc, 
                                                 EBITDA_AR, \
                                                 diversification_measure = diversification_type,
                                                 years_FE=YFE)
    dt['at_usd'] = dt['at_usd'].apply(np.log)
    dt['at_usd_rw'] = dt['at_usd_rw'].apply(np.log)
    dt['at_lagged_rw'] = scale(dt['at_lagged_rw'])
    dt_ws = dt.copy()
    post_gvkey = list(dt_ws[dt_ws.rfyear >= 2020].gvkey.unique())
    idx = dt_ws[dt_ws.rfyear < 2020][['gvkey', 'rfyear']].groupby('gvkey').count()
    idx = list(idx[idx > 3].dropna().index)
    dt_ws = dt_ws[dt_ws.gvkey.isin(np.unique(post_gvkey+idx))]
    dt_ws = dt_ws[dt_ws.total_effort > 1]
    dt_ws['Emission_intensity_fut_tot'] = dt_ws['DirectControl_fut_tot']/dt_ws['sale_usd']
    dt_ws['Emission_intensity'] = dt_ws['DirectControl']/dt_ws['sale_usd']

    dt_ws[dummies] = dt_ws[dummies].astype(int)
    dt[dummies] = dt[dummies].astype(int)
    
    pickle.dump([comp, db, IMR, esg, tc, EBITDA_AR,  EBIT_AR, initiatives, missing_sdgs, initiatives_sdgs, initiatives_to_remove, dt, dt_ws, dummies], open('local_data/dataset_for_paper_'+diversification_type+'_070723.pckl', 'wb'))

#%%
sdgs = [6,7,9,11,12,13,14,15]
sdgs_options_name =  'Environmental SDGs'
diversification_type = ['entropy', 'entropy_segments', 'simpson'][0]
comp, db, IMR, esg, tc,  EBITDA_AR, EBIT_AR, initiatives, missing_sdgs, initiatives_sdgs, initiatives_to_remove, dt, dt_ws, dummies = pickle.load( open('local_data/dataset_for_paper_'+diversification_type+'_070723.pckl', 'rb'))


#%% Get the statistics of the sample to show in the SI
get_summary_statistics_after_matching(dt_ws)
get_sample_contribution(dt_ws)
get_policy_variables(dt_ws)

#%% BRD-Number of initiatives association
reg_res = rg.make_effort_BRD_ass(dt, dummies)

#%% Association with assets characteristics
res_bar0, tab_reg0 = reg0.estimate_BRD_assets(dt_ws, dummies, YFE, sdgs_options_name)

#%% Main regression
res_bar1A, list_of_models1A, list_of_data1A, tab_reg1A = reg1A.estimate_effect_of_BRD(dt_ws, dummies, YFE, sdgs_options_name, diversification_type)

#%% Main regression in time
res_bar2, list_of_models2, list_of_data2, tab_reg2 = reg2.estimate_effect_of_BRD_in_time(dt_ws, dummies, YFE, sdgs_options_name, diversification_type)


#%% Earnings at risk
res_bar5, list_of_data5 = reg5.estimate_effort_EAR(comp, db, IMR, esg, tc, sdgs, EBITDA_AR, dummies, YFE, sdgs_options_name, diversification_type)


#%%
#================================#
#========= Plots for SI =========#
#================================#
#%% Un-standardised coefficients in time
res_bar2, list_of_models2, list_of_data2, tab_reg2 = reg2.estimate_effect_of_BRD_in_time(dt_ws, dummies, YFE, sdgs_options_name, diversification_type, standardise_coefficients = False)

#%% Earnings at risk
res_bar5, list_of_data5 = reg5.estimate_effort_EAR(comp, db, IMR, esg, tc, sdgs, EBIT_AR, dummies, YFE, sdgs_options_name, diversification_type, SI=True)

